library(ggplot2)
library(tidycensus)
library(tidyverse)
library(dplyr)
library(viridis)

Variables Used

area_vars_2018 <- load_variables (2018, "acs5")
  1. Total population (continuous) : tot_pop
  2. Median Income (continuous) : med_income
  3. Monthly Housing Cost (continuous) : mon_hous
  4. Percentage of the tenant population (continuous) : pct_ten
  5. Percentage of the population using public transport to work (continuous) : pct_pubtra
  6. Majority race of the population (not Hispanic or Latino) (categorical) : maj_race
  7. Whether the majority of the population is US-born or not (categorical) : maj_citizenship

Loading 2014-2018 5 year ACS data for Los Angeles county, CA

pop_vars <- c(tot_pop = "B01003_001",
              med_income = "B21004_001",
              mon_hous = "B25104_001",
              tot_ten = "B07013_003", 
              tot_pubtra = "B08006_008",
              white_alone = "B03002_003",
              black_AA_alone = "B03002_004",
              asian_alone =  "B03002_006",
              hispanic_latino = "B03002_012",
              other = "B03002_008",
              us_born = "B05001_002")

tractLA2018 <- get_acs(geography = "tract",
                        variables = pop_vars,
                        output = "wide",
                        state = "CA",
                       county = "Los Angeles",
                        year = 2018,
                        survey = "acs5")

Calculating Variables

tractLA2018 <- tractLA2018 %>%
  mutate(pct_ten = tot_tenE / tot_popE,
         pct_pubtra = tot_pubtraE / tot_popE,
         pct_white = white_aloneE/tot_popE,
         pct_black = black_AA_aloneE/tot_popE,
         pct_hisp_lat = hispanic_latinoE/tot_popE,
         pct_asian = asian_aloneE/tot_popE,
         pct_other = otherE/tot_popE,
         maj_usborn = us_bornE / tot_popE) %>% 
   mutate(maj_race = case_when(pct_white > 0.5 ~ "nh_white",
                             pct_black > 0.5 ~ "nh_black",
                             pct_asian > 0.5 ~ "nh_asian",
                             pct_hisp_lat > 0.5 ~ "hs_latino",
                             pct_other > 0.5 ~ "nh_other",
                             TRUE ~ "no_majority")) %>%
   mutate(maj_citizenship = case_when(maj_usborn > 0.5 ~ "usborn",
                                      maj_usborn < 0.5 ~ "immigrant")) %>%
            
   select(tot_popE, med_incomeE, mon_housE, pct_ten, pct_pubtra, maj_race, maj_citizenship)

tractLA2018
## # A tibble: 2,346 x 7
##    tot_popE med_incomeE mon_housE pct_ten pct_pubtra maj_race    maj_citizenship
##       <dbl>       <dbl>     <dbl>   <dbl>      <dbl> <chr>       <chr>          
##  1     5619       32500      1820   0.157    0.0114  nh_black    usborn         
##  2     4424       29242      1390   0.485    0.00475 hs_latino   usborn         
##  3     7441       26467      1886   0.193    0.0188  hs_latino   usborn         
##  4     8192       38699      2318   0.308    0       hs_latino   usborn         
##  5     7311       42620      2208   0.275    0.0146  hs_latino   usborn         
##  6     3742       28333      1334   0.622    0.0102  no_majority usborn         
##  7     3846       23549      1469   0.650    0.0109  no_majority usborn         
##  8     3908       25980      1572   0.495    0.0251  no_majority usborn         
##  9     4914       37529      1575   0.162    0.00387 no_majority usborn         
## 10     3562       45625      1530   0.608    0.0101  nh_white    usborn         
## # … with 2,336 more rows

Plot1

Plotting a majority of my variables

tractLA2018 <- tractLA2018 %>%
  filter(!is.na(maj_citizenship))

ggplot(tractLA2018, 
       aes(x = med_incomeE,
           y = pct_ten,
           shape = maj_citizenship,
           color = maj_race,
           alpha = mon_housE)) +
  geom_point(size = 1.3) +
  scale_x_continuous(name = "Median Income",
                     breaks = seq(10000, 120000, by = 20000),
                     labels = c("10k","30k","50k","70k","90k","110k")) +
  scale_y_continuous(name = "Percentage of Tenant population",
                     breaks = seq(0, 1, by = 0.1),
                     labels = paste(seq(0, 100, by = 10), "%", sep = "")) +
  scale_color_discrete(name = "Majority race/ethnicity",
                       labels = c("Non-Hispanic White",
                                  "Non-Hispanic Black",
                                  "Hispanic Latino",
                                  "Non-Hispanic Asian",
                                  "Non-Hispanic Others",
                                  "No Majority")) +
  scale_alpha_continuous(name = "Monthly Housing Cost") +
  scale_shape(name = "Majority Citizenship", 
              labels = c("US-Born", "Immigrant")) +
  theme_bw() +
  theme(legend.key.size = unit(0.1, "cm"))
## Warning: Removed 7 rows containing missing values (geom_point).

Plot2

A scatter plot of median income and percentage of the tenant population.

theme_set(theme_classic())
ggplot(tractLA2018, 
       aes(x = pct_ten,
           y = med_incomeE,
           alpha = pct_pubtra)) +
  stat_smooth(color = "red", linetype = 2, size = 0.7) +
  geom_point(size = 0.7,color = "#271774") +
  scale_alpha_continuous(name = "Percentage of Using Public Transport to Work") +
  scale_x_continuous(name = "Percentage of Tenant population",
                     breaks = seq(0, 1, by = 0.1),
                     labels = paste(seq(0, 100, by = 10), sep = "")) +
  scale_y_continuous(name = "Median Income",
                     breaks = seq(10000, 120000, by = 10000)) +
  theme_linedraw() +
  theme(text = element_text(size = 10), 
        legend.key.size = unit(0.5, "cm"))
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

Plot3

A heat map of majority citizenship and majority race/ethnicity.

tractLA2018 <- tractLA2018 %>%
  filter(!is.na(maj_citizenship))
 
ggplot(tractLA2018,
        aes(x = maj_race, 
            y = maj_citizenship, 
            fill = pct_ten,
            na.rm = TRUE)) +
      geom_tile() +
      scale_x_discrete(name = "Majority Race/Ethnicity",
                       labels = c("Non-H White",
                                  "Non-H Black",
                                  "H Latino",
                                  "Non-H Asian", 
                                  "Non-H Others",
                                  "No Majority")) +
      scale_y_discrete(name = "Majority Citizenship",
                       labels = c("US_Born",
                                  "Immigrant")) +
      scale_fill_distiller(palette = "#468966") +
      labs(fill = "Percent of Tenant Population") + 
      theme_classic() +
      theme(text = element_text(size = 10))
## Warning in pal_name(palette, type): Unknown palette #468966

Plot4

Scatter plot of median income and the percent of tenant population

ggplot(tractLA2018, aes(x=pct_ten, y=med_incomeE, fill = maj_race)) +
    geom_point(alpha=0.5, shape=21, color = "red") + 
    geom_smooth(method="loess", se=F, color = "#510E80") + 
    scale_fill_viridis(discrete=TRUE, guide=FALSE, option="B") +
    scale_x_continuous(name = "Percentage of Tenant Population",
                     breaks = seq(0, 1, by = 0.1),
                     labels = paste(seq(0, 100, by = 10), "%", sep = "")) +
    scale_y_continuous(name = "Median Income",
                     breaks = seq(10000, 120000, by = 10000),
                     labels = paste(seq(10, 120, by = 10), "K", sep = "")) +
    labs(fill = "Majority Race/Ethnicity") 
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

Plot5

Scatterplot with a polar coordinate system of the percentage of tenant population and the percentage of using public transport to work

theme_set(theme_bw())
ggplot(tractLA2018, 
       aes(x = pct_pubtra,
           y = pct_ten)) +
  geom_point(alpha = 0.5, size = 0.5) +
  stat_smooth(color = "red", linetype = 2, size = 1) +
  scale_y_continuous(name = "Percentage of Tenant Population",
                     breaks = seq(0, 1, by = 0.2),
                     labels = paste(seq(0, 100, by = 20), sep = "")) +
  scale_x_continuous(name = "Percentage of Using Public Transportation to Work",
                     breaks = seq(0, 1, by = 0.1),
                     labels = paste(seq(0, 100, by = 10), sep = "")) +
  coord_polar(theta = "x") 
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Plot6

A dot plot of the percent of using public transportation to work and majority race/ethnicity

theme_set(theme_gray())
ggplot(tractLA2018,
       aes(x = maj_race,
           y = pct_pubtra,
           color = maj_race)) +
  geom_point(size = 0.8,
             alpha = 0.5) + 
  scale_x_discrete(name = "Majority Race/Ethnicity",
                    labels = c("Non-H White", "Non-H Black", "Non-H Asian", "H Latino", "Non-H Other", "No Majority")) + 
  scale_y_continuous(name = "Percentage of Using Public Transportation to Work",
                     breaks = seq(0, 1, by = 0.1),
                     labels = paste(seq(0, 100, by = 10), sep = "")) +
  theme(legend.position = "none")

Plot7

A dot plot with jitter of the percent of tenant population and majority race/ethnicity

theme_set(theme_light())
ggplot(tractLA2018,
       aes(x = maj_race,
           y = pct_ten,
           color = maj_race)) +
  geom_point(position = "jitter",
              size = 0.5,
             alpha = 0.7) + 
  scale_x_discrete(name = "Majority Race/Ethnicity",
                    labels = c("Non-H White", "Non-H Black", "Non-H Asian", "H Latino", "Non-H Other", "No Majority")) + 
  scale_y_continuous(name = "Percentage of Tenant Population",
                     breaks = seq(0, 1, by = 0.1),
                     labels = paste(seq(0, 100, by = 10), sep = "")) +
  theme(legend.position = "none")

Plot8

A box plot of the percentage of tenant population and the majority race/ethnicity

theme_set(theme_dark())
ggplot(tractLA2018,
       aes(x = maj_race,
           y = pct_ten,
           color = maj_race)) +
  geom_boxplot() + 
  scale_x_discrete(name = "Majority Race/Ethnicity",
                    labels = c("Non-H White", "Non-H Black", "Non-H Asian", "H Latino", "Non-H Other", "No Majority")) + 
  scale_y_continuous(name = "Percentage of Tenant Population",
                     breaks = seq(0, 1, by = 0.1),
                     labels = paste(seq(0, 100, by = 10), sep = "")) +
  theme(legend.position = "none")

Plot9

A violin plot of the percentage of tenant population and the majority race/ethnicity

ggplot(tractLA2018,
       aes(x = maj_race,
           y = pct_ten,
           color = maj_race)) +
  geom_violin() +
  scale_fill_viridis(discrete=TRUE) +
  scale_color_viridis(discrete=TRUE) +
  geom_point(position = "jitter",
              size = 0.3,
             alpha = 0.7) + 
  scale_x_discrete(name = "Majority Race/Ethnicity",
                    labels = c("Non-H White", "Non-H Black", "Non-H Asian", "H Latino", "Non-H Other", "No Majority")) + 
  scale_y_continuous(name = "Percentage of Tenant Population",
                     breaks = seq(0, 1, by = 0.1),
                     labels = paste(seq(0, 100, by = 10), sep = "")) +
  theme(legend.position = "none")

Plot10

Stacked bar chart of percentage of using public transportation to work and majority citizenship

theme_set(theme_classic())
tractLA2018 <- tractLA2018 %>%
  filter(!is.na(maj_citizenship))

ggplot(tractLA2018, 
       aes(x=maj_citizenship, y=pct_pubtra, fill = maj_race)) +
  geom_bar(stat = "identity", width = 0.5) + 
  scale_y_continuous(name = "Percentage of Using Public Transportation to Work") +
  scale_x_discrete(name = "Majority Citizenship",
                       labels = c("US-Born",
                                  "Immigrant"))

Plot11

Bar chart of number of tracts and majority citizenship

theme_set(theme_bw())
ggplot(tractLA2018, aes(x = maj_citizenship, fill = maj_race)) +
  geom_bar(position = "dodge2") + 
  scale_y_continuous(name = "Number of Tracts") + 
  scale_x_discrete(name = "Majority Citizenship", 
                   labels = c("US_born", "Immigrant")) +
  scale_fill_discrete(name = element_blank(),
                      labels = c("Non-H White", "Non-H Black", "Non-H Asian", "H Latino", "Non-H Other", "No Majority")) +
  labs(title= "") 

Plot12

Area chart of the percentage of tenant population

theme_set(theme_minimal())
ggplot(tractLA2018, aes(x=pct_ten)) +
  geom_density(color="darkblue", fill="lightblue", alpha=0.8) +
  scale_x_continuous(name = "Percentage of Tenant population ",
                     breaks = seq(0, 1, by = 0.1),
                     labels = paste(seq(0, 100, by = 10), "%", sep = "")) +
  labs(title= "Percentage of Tenant Population of the census tracts in LA county") 

Plot13

Count Chart of the percentage of using public transport to work and majority race/ethnicity

theme_set(theme_minimal())
ggplot(tractLA2018, aes(x=maj_race, y=pct_pubtra)) +
  geom_count(color="tomato3", show.legend=TRUE) +
  scale_y_continuous(name = "Percentage of Using Public Transportation to Work",
                     breaks = seq(0, 1, by = 0.1),
                     labels = paste(seq(0, 100, by = 10), "%", sep = "")) +
  scale_x_discrete(name = "Majority Race/Ethnicity",
                   labels = c("Non-H White", "Non-H Black", "Non-H Asian", "H Latino", "Non-H Other", "No Majority")) + 
  labs(title= "Percentage of Using Public Transport to Work by Major Race/Ethnicity",
       subtitle = "Census tracts in LA county")